---
title: "Simulation"
author: "Kohei Watanabe"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, collapse = TRUE)
source("functions.R")
```

## Random draw

```{r fig.width=6, fig.height=6}
simu_rand <- readRDS("data_simulation_random.RDS")
dat_rand <- do.call(rbind, simu_rand)
par(mar = c(4.1, 4.1, 1.1, 1.1))
plot(dat_rand$d2, dat_rand$f1, pch = 1, col = dat_rand$n,
     xlab = "Coverage", ylab = "F1")
grid()
legend("bottomright", legend = unique(dat_rand$n), col = unique(dat_rand$n), pch = 1)

cor(dat_rand$d2, dat_rand$f1)

dat_dummy <- data.frame(d2 = seq(0, 2, by = 0.01))
reg <- lm(f1 ~ I(d2 ^ 2) + d2, dat_rand)
dat_dummy$fit1 <- predict(reg, newdata = dat_dummy)
lines(dat_dummy$d2, dat_dummy$fit1)
```

## Random path

```{r}
simu_path <- readRDS("data_simulation_path.RDS")
dat_path <- do.call(rbind, lapply(simu_path, tail, 1))
min(dat_path$f1)
max(dat_path$f1)
mean(dat_path$f1)
sd(dat_path$f1)
```


```{r fig.width=10, fig.height=8}
for (i in head(order(abs(dat_path$f1 - mean(dat_path$f1))), 5)) {
  cat(i, "\n")
  plot_path2(simu_path[[i]])
}
```

```{r}
simu_path <- readRDS("data_simulation_path.RDS")
dat_diff <- data.frame()
for (i in seq_along(simu_path)) {
  dat_diff <- rbind(dat_diff, 
                     data.frame(e2 = diff((simu_path[[i]]$e2 / simu_path[[i]]$e2[1]) - 1),
                                d2 = diff((simu_path[[i]]$d2 / simu_path[[i]]$d2[1]) - 1),
                                f1 = diff(simu_path[[i]]$f1)))
}
dat_diff$e2 <- dat_diff$e2 * 100
dat_diff$d2 <- dat_diff$d2 * 100
```

```{r fig.width=7, fig.height=7}
par(mar = c(4.1, 4.1, 1.1, 1.1))
plot(dat_diff$e2, dat_diff$f1, cex = dat_diff$d2 / 10, 
     xlab = "AFE (% diff)", ylab = "F1 (diff)")
grid()
abline(v = 0, lty = 3)
abline(lm(f1 ~ e2, dat_diff))
legend("topright", "Coverage (% diff)", pch = 1)
```

```{r}
cor.test(dat_diff$e2, dat_diff$f1)
tb <- table(dat_diff$e2 > 0, dat_diff$f1 < 0)
chi <- chisq.test(tb)
chi
prob <- chi$observed / rowSums(chi$observed) 
prob[2,2] / prob[1,2]
```

```{r}
dat_select <- readRDS("data_simulation_selection.RDS")
dat_summ <- readRDS("data_simulation_summary.RDS")
```

```{r, asis=TRUE}
dat_remove <- subset(dat_select, e2_diff > 0)
lis_remove <- split(dat_remove$word_added, dat_remove$topic_added)
lapply(lis_remove, paste, collapse = ", ")
```

```{r, asis=TRUE}
knitr::kable(dat_summ, digits = 3)
```

```{r fig.width=7, fig.height=7}
par(mar = c(4.1, 4.1, 1.1, 1.1))
plot(dat_select$e2_diff, dat_select$f1_diff, type= "n", xlab = "AFE (% diff)", ylab = "F1 (diff)")
text(dat_select$e2_diff, dat_select$f1_diff, dat_select$word_added, col = rgb(0, 0 ,0, 0.6))
grid()
abline(v = 0, lty = 3)
```

